1 Showcasing RRPlots

1.0.1 Libraries

library(survival)
library(FRESA.CAD)
## Loading required package: Rcpp
## Loading required package: stringr
## Loading required package: miscTools
## Loading required package: Hmisc
## 
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
## 
##     format.pval, units
## Loading required package: pROC
## Type 'citation("pROC")' for a citation.
## 
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
## 
##     cov, smooth, var
#source("~/GitHub/FRESA.CAD/R/RRPlot.R")
#source("~/GitHub/FRESA.CAD/R/PoissonEventRiskCalibration.R")
op <- par(no.readonly = TRUE)
pander::panderOptions('digits', 3)
#pander::panderOptions('table.split.table', 400)
pander::panderOptions('keep.trailing.zeros',TRUE)
layout(matrix(1:1, nrow=1))
source("~/Documents/GitHub/RISKPLOTS/CODE/auxplots.R")

1.0.2 Wisconsin Data Set

dataBreast <- read.csv("~/Documents/GitHub/RISKPLOTS/DATA/wpbc.data", header=FALSE)
table(dataBreast$V2)
## 
##   N   R 
## 151  47
rownames(dataBreast) <- dataBreast$V1
dataBreast$V1 <- NULL
dataBreast$status <- 1*(dataBreast$V2=="R")
dataBreast$V2 <- NULL
dataBreast$time <- dataBreast$V3
dataBreast$V3 <- NULL
dataBreast <- sapply(dataBreast,as.numeric)
## Warning in lapply(X = X, FUN = FUN, ...): NAs introduced by coercion
dataBreast <- as.data.frame(dataBreast[complete.cases(dataBreast),])
table(dataBreast$status)
## 
##   0   1 
## 148  46

1.1 Exploring Raw Features with RRPlot

convar <- colnames(dataBreast)[lapply(apply(dataBreast,2,unique),length) > 10]
convar <- convar[convar != "time"]
topvar <- univariate_BinEnsemble(dataBreast[,c("status",convar)],"status")
pander::pander(topvar)
V35 V24 V34 V7 V16 V14 V17
0.0261 0.0261 0.0261 0.0623 0.126 0.126 0.126
topv <- min(5,length(topvar))
topFive <- names(topvar)[1:topv]
RRanalysis <- list();
idx <- 1
topf <- topFive[1]
for (topf in topFive)
{
  RRanalysis[[idx]] <- RRPlot(cbind(dataBreast$status,dataBreast[,topf]),
                              atRate=c(0.90,0.80),
                  timetoEvent=dataBreast$time,
                  title=topf,
#                  plotRR=FALSE
                  )
  idx <- idx + 1
}

names(RRanalysis) <- topFive

1.2 Reporting the Metrics

pander::pander(RRanalysis[[1]]$keyPoints,caption=topFive[1])
V35
  Thr RR RR_LCI RR_UCI SEN SPE BACC
@:0.9 1.00e+01 1.33 0.6783 2.63 0.152 0.8919 0.522
@:0.8 3.00e+00 2.32 1.4235 3.77 0.478 0.7770 0.628
@MAX_BACC 1.00e+00 2.54 1.4699 4.40 0.674 0.6216 0.648
@MAX_RR 9.38e-09 2.69 1.4190 5.11 0.783 0.4932 0.638
@SPE100 -9.57e-09 17.22 0.0364 8141.87 1.000 0.0473 0.524
pander::pander(RRanalysis[[2]]$keyPoints,caption=topFive[2])
V24
  Thr RR RR_LCI RR_UCI SEN SPE BACC
@:0.9 25.4 1.94 1.1306 3.34 0.239 0.8919 0.566
@:0.8 23.9 1.67 1.0015 2.78 0.348 0.7905 0.569
@MAX_BACC 20.3 2.45 1.3530 4.44 0.739 0.5270 0.633
@MAX_RR 16.6 3.87 0.9914 15.08 0.957 0.1824 0.569
@SPE100 15.5 33.04 0.0685 15945.00 1.000 0.0878 0.544
RRanalysis[[2]]$keyPoints["@MAX_BACC",c("BACC","RR")]
           BACC       RR

@MAX_BACC 0.6330787 2.451923

ROCAUC <- NULL
CstatCI <- NULL
LogRangp <- NULL
Sensitivity <- NULL
Specificity <- NULL
MAXBACC <- NULL
RREst <- NULL

for (topf in topFive)
{
  CstatCI <- rbind(CstatCI,RRanalysis[[topf]]$c.index$cstatCI)
  LogRangp <- rbind(LogRangp,RRanalysis[[topf]]$surdif$pvalue)
  Sensitivity <- rbind(Sensitivity,RRanalysis[[topf]]$ROCAnalysis$sensitivity)
  Specificity <- rbind(Specificity,RRanalysis[[topf]]$ROCAnalysis$specificity)
  ROCAUC <- rbind(ROCAUC,RRanalysis[[topf]]$ROCAnalysis$aucs)
  MAXBACC <- rbind(MAXBACC,RRanalysis[[topf]]$keyPoints["@MAX_BACC",c("BACC")])
  RREst <- rbind(RREst,RRanalysis[[topf]]$keyPoints[1,c("RR")])
}
rownames(CstatCI) <- topFive
rownames(LogRangp) <- topFive
rownames(Sensitivity) <- topFive
rownames(Specificity) <- topFive
rownames(ROCAUC) <- topFive
rownames(MAXBACC) <- topFive
rownames(RREst) <- topFive

pander::pander(ROCAUC)
  est lower upper
V35 0.648 0.553 0.742
V24 0.633 0.542 0.725
V34 0.658 0.572 0.745
V7 0.610 0.515 0.705
V16 0.598 0.504 0.692
pander::pander(CstatCI)
  mean.C Index median lower upper
V35 0.631 0.631 0.538 0.717
V24 0.677 0.677 0.599 0.752
V34 0.655 0.659 0.590 0.731
V7 0.666 0.665 0.579 0.737
V16 0.614 0.614 0.522 0.702
pander::pander(LogRangp)
V35 0.00104
V24 0.00938
V34 0.00282
V7 0.07332
V16 0.02135
pander::pander(Sensitivity)
  est lower upper
V35 0.152 0.0634 0.289
V24 0.239 0.1259 0.388
V34 0.152 0.0634 0.289
V7 0.152 0.0634 0.289
V16 0.109 0.0362 0.236
pander::pander(Specificity)
  est lower upper
V35 0.899 0.838 0.942
V24 0.899 0.838 0.942
V34 0.892 0.830 0.937
V7 0.899 0.838 0.942
V16 0.899 0.838 0.942
pander::pander(MAXBACC)
V35 0.648
V24 0.633
V34 0.642
V7 0.621
V16 0.614
pander::pander(RREst)
V35 1.33
V24 1.94
V34 1.33
V7 1.33
V16 1.00
meanMatrix <- cbind(ROCAUC[,1],CstatCI[,1],RREst,Sensitivity[,1],Specificity[,1],MAXBACC)
colnames(meanMatrix) <- c("ROCAUC","C-Stat","RR","Sen","Spe","MAX_BACC")
pander::pander(meanMatrix)
  ROCAUC C-Stat RR Sen Spe MAX_BACC
V35 0.648 0.631 1.33 0.152 0.899 0.648
V24 0.633 0.677 1.94 0.239 0.899 0.633
V34 0.658 0.655 1.33 0.152 0.892 0.642
V7 0.610 0.666 1.33 0.152 0.899 0.621
V16 0.598 0.614 1.00 0.109 0.899 0.614

1.3 Modeling

ml <- BSWiMS.model(Surv(time,status)~1,data=dataBreast,NumberofRepeats = 10)

[+++++++++++++++++++++++++++++++++++++++++++++++]….

sm <- summary(ml)
pander::pander(sm$coefficients)
Table continues below
  Estimate lower HR upper u.Accuracy r.Accuracy
V24 5.15e-02 1.02 1.05 1.09 0.598 0.247
V26 5.32e-03 1.00 1.01 1.01 0.593 0.318
V27 2.07e-04 1.00 1.00 1.00 0.608 0.340
V34 9.98e-03 1.00 1.01 1.02 0.634 0.318
V7 5.53e-08 1.00 1.00 1.00 0.588 0.287
V35 5.52e-03 1.00 1.01 1.01 0.727 0.601
V6 1.15e-07 1.00 1.00 1.00 0.577 0.237
V4 1.12e-06 1.00 1.00 1.00 0.582 0.237
Table continues below
  full.Accuracy u.AUC r.AUC full.AUC IDI NRI z.IDI
V24 0.598 0.609 0.503 0.609 0.0617 0.434 2.86
V26 0.597 0.598 0.523 0.601 0.0624 0.398 2.76
V27 0.609 0.608 0.530 0.606 0.0562 0.436 2.75
V34 0.628 0.618 0.524 0.616 0.0302 0.460 2.36
V7 0.590 0.595 0.515 0.598 0.0476 0.378 2.26
V35 0.616 0.641 0.603 0.606 0.0282 0.551 2.26
V6 0.577 0.588 0.500 0.588 0.0459 0.353 2.19
V4 0.582 0.606 0.500 0.606 0.0449 0.426 2.19
  z.NRI Delta.AUC Frequency
V24 2.65 0.1064 1.0
V26 2.41 0.0777 1.0
V27 2.64 0.0763 1.0
V34 2.78 0.0914 0.9
V7 2.29 0.0836 0.7
V35 3.41 0.0022 1.0
V6 2.13 0.0881 0.1
V4 2.62 0.1065 0.1

1.4 Cox Model Performance

Here we evaluate the model using the RRPlot() function.

1.4.1 The evaluation of the raw Cox model with RRPlot()

Here we will use the predicted event probability assuming a baseline hazard for events

index <- predict(ml,dataBreast)
timeinterval <- round(2*mean(subset(dataBreast,status==1)$time),0)

h0 <- sum(dataBreast$status & dataBreast$time <= timeinterval)
h0 <- h0/sum((dataBreast$time > timeinterval) | (dataBreast$status==1))
pander::pander(t(c(h0=h0,timeinterval=timeinterval)),caption="Initial Parameters")
Initial Parameters
h0 timeinterval
0.323 51
rdata <- cbind(dataBreast$status,ppoisGzero(index,h0))
rownames(rdata) <- rownames(dataBreast)

rrAnalysisTrain <- RRPlot(rdata,atRate=c(0.90,0.80),
                     timetoEvent=dataBreast$time,
                     title="Raw Train: Breast Cancer",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=timeinterval)

1.4.2 Uncalibrated Performance Report

pander::pander(t(rrAnalysisTrain$keyPoints),caption="Threshold values")
Threshold values
  @:0.9 @:0.8 @MAX_BACC @MAX_RR @SPE100 p(0.5)
Thr 0.43300 0.36528 0.2607 0.1755 1.59e-01 0.502776
RR 1.94392 1.83346 2.2857 3.4048 2.77e+01 2.307692
RR_LCI 1.13060 1.11141 1.3037 0.8778 5.75e-02 1.275480
RR_UCI 3.34229 3.02460 4.0074 13.2066 1.33e+04 4.175246
SEN 0.23913 0.36957 0.6957 0.9565 1.00e+00 0.152174
SPE 0.89189 0.79730 0.5608 0.1622 7.43e-02 0.952703
BACC 0.56551 0.58343 0.6282 0.5593 5.37e-01 0.552438
NetBenefit -0.00627 -0.00135 0.0468 0.0908 1.04e-01 -0.000396
pander::pander(t(rrAnalysisTrain$OERatio$estimate),caption="O/E Test")
O/E Test
O/E Low Upper p.value
0.818 0.599 1.09 0.182
pander::pander(t(rrAnalysisTrain$OE95ci),caption="O/E Mean")
O/E Mean
mean 50% 2.5% 97.5%
1 1 0.948 1.06
pander::pander(t(rrAnalysisTrain$OARatio$estimate),caption="O/Acum Test")
O/Acum Test
O/A Low Upper p.value
0.943 0.69 1.26 0.774
pander::pander(t(rrAnalysisTrain$OAcum95ci),caption="O/Acum Mean")
O/Acum Mean
mean 50% 2.5% 97.5%
0.916 0.916 0.908 0.923
pander::pander(t(rrAnalysisTrain$c.index$cstatCI),caption="C. Index")
C. Index
mean.C Index median lower upper
0.684 0.685 0.606 0.761
pander::pander(t(rrAnalysisTrain$ROCAnalysis$aucs),caption="ROC AUC")
ROC AUC
est lower upper
0.642 0.551 0.734
pander::pander((rrAnalysisTrain$ROCAnalysis$sensitivity),caption="Sensitivity")
Sensitivity
est lower upper
0.239 0.126 0.388
pander::pander((rrAnalysisTrain$ROCAnalysis$specificity),caption="Specificity")
Specificity
est lower upper
0.899 0.838 0.942
pander::pander(t(rrAnalysisTrain$thr_atP),caption="Probability Thresholds")
Probability Thresholds
90% 80%
0.434 0.364
pander::pander(rrAnalysisTrain$surdif,caption="Logrank test")
Logrank test Chisq = 11.389095 on 2 degrees of freedom, p = 0.003364
  N Observed Expected (O-E)^2/E (O-E)^2/V
class=0 147 29 37.05 1.750 9.17
class=1 21 6 4.33 0.643 0.72
class=2 26 11 4.62 8.833 9.91